#Load packages
pacman::p_load(plotly, ggstatsplot, tidyverse)
#Import data
flatprice <- read_csv("data/resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv", show_col_types = FALSE)Take-home Exercise 3
Visual Analytics of Resale Prices of Singapore Public Housing Properties
1. Overview
This exercise aims to uncover the salient patterns of the resale prices of public housing property by residential towns and estates in Singapore using appropriate analytical visualisation techniques. The visualization is designed using ggplot2, its extensions, and tidyverse packages.
The original dataset was downloaded from Data.gov.sg titled Resale flat princes based on registration date from Jan-2017 onwards.
The file downloaded was resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv
The focus of the study is on 3-ROOM, 4-ROOM and 5-ROOM types for 2022 period.
2. Data Preparation
#Data preparation
#filter for 2022 and 3-ROOM, 4-ROOM, 5-ROOM
#mutate remaining_lease to years
flatpriceclean <- flatprice |>
filter(flat_type %in% c('3 ROOM','4 ROOM','5 ROOM')) |>
mutate(year = as.integer(format(as.Date(paste(month, "-01", sep="")), "%Y")),
month = as.integer(format(as.Date(paste(month, "-01", sep="")), "%m")),
.before = 1)|>
filter(year == 2022) |>
mutate(remaining_lease_years = round((as.numeric(str_extract(remaining_lease, "^[0-9]+")) +
ifelse(is.na(as.numeric(str_extract(remaining_lease, " [0-9]+"))), 0, as.numeric(str_extract(remaining_lease, " [0-9]+")))/12), digits = 1),
resale_price_persqm = round(resale_price/floor_area_sqm, digits = 1),
lease_commence_date = as.integer(lease_commence_date),
.after = remaining_lease)
flatpriceclean# A tibble: 24,374 × 14
year month town flat_t…¹ block stree…² store…³ floor…⁴ flat_…⁵ lease…⁶
<int> <int> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <int>
1 2022 1 ANG MO KIO 3 ROOM 320 ANG MO… 07 TO … 73 New Ge… 1977
2 2022 1 ANG MO KIO 3 ROOM 225 ANG MO… 07 TO … 67 New Ge… 1978
3 2022 1 ANG MO KIO 3 ROOM 331 ANG MO… 07 TO … 68 New Ge… 1981
4 2022 1 ANG MO KIO 3 ROOM 534 ANG MO… 07 TO … 82 New Ge… 1980
5 2022 1 ANG MO KIO 3 ROOM 578 ANG MO… 04 TO … 67 New Ge… 1980
6 2022 1 ANG MO KIO 3 ROOM 452 ANG MO… 01 TO … 83 New Ge… 1979
7 2022 1 ANG MO KIO 3 ROOM 560 ANG MO… 01 TO … 67 New Ge… 1980
8 2022 1 ANG MO KIO 3 ROOM 435 ANG MO… 04 TO … 67 New Ge… 1979
9 2022 1 ANG MO KIO 3 ROOM 435 ANG MO… 04 TO … 67 New Ge… 1979
10 2022 1 ANG MO KIO 3 ROOM 560 ANG MO… 10 TO … 67 New Ge… 1980
# … with 24,364 more rows, 4 more variables: remaining_lease <chr>,
# remaining_lease_years <dbl>, resale_price_persqm <dbl>, resale_price <dbl>,
# and abbreviated variable names ¹flat_type, ²street_name, ³storey_range,
# ⁴floor_area_sqm, ⁵flat_model, ⁶lease_commence_date
#Check for missing values
any(is.na(flatpriceclean))[1] FALSE
3. Visualization
Animation does not make sense here since this is not data evolution through time
#ggplot(flatpriceclean, aes(x = floor_area_sqm, y = resale_price,
# colour = flat_type)) +
# geom_point(alpha = 0.7,
# show.legend = FALSE) +
# scale_color_manual(values = c("red","blue","green"),
# label = flatpriceclean$flat_type) +
# labs(title = 'Lease Commence Year: {frame_time}',
# x = 'Floor Area (sqm)',
# y = 'Resale Price (SGD)') +
# transition_time(lease_commence_date) +
# ease_aes('linear') 3.1 Preliminary Visualization
The first plot purpose is to provide preliminary insight on the resale price of property vs remaining lease. the plot looks very cluttered as the number of dataset is high, however, this is deemed to be sufficient for preliminary analysis. Note that the resale price is normalized with floor area, as absolute resale price tends to be more expensive for bigger area.
The first plot design consideration :
Color legend for flat type (3 ROOM, 4 ROOM, 5 ROOM) in plotly allows users to filter accordingly
Bubble plot size is based on floor area
Hover tip displaying the resale price, floor area, remaining lease, and flat type
Show the code
plot_ly(data = flatpriceclean,
x = ~remaining_lease_years,
y = ~resale_price_persqm,
hovertemplate = ~paste("<br>Resale Price per sqm:", resale_price_persqm,
"<br>Floor Area (sqm):", floor_area_sqm,
"<br>Remaining Lease (Year):", remaining_lease_years),
type = 'scatter',
mode = 'markers',
size = ~floor_area_sqm,
sizes = c(5, 12.5),
color = ~flat_type,
marker = list(opacity = 0.6,
sizemode = 'diameter',
line = list(width = 0.2, color = '#FFFFFF'))) |>
layout(title = "Resale Price per flat area increases with remaining lease, 2022 transactions",
xaxis = list(title = "Remaining Lease (Year)"),
yaxis = list(title = "Resale Price per sqm (SGD)"),
legend = list(orientation = 'h',
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor ="top",
x = 0.5,
y = 1))Using updatemenus to get a good first glance of all relationships
Show the code
flatpriceclean$flat_model <- fct_reorder(flatpriceclean$flat_model, flatpriceclean$resale_price_persqm)
plot_ly(data = flatpriceclean,
x = ~flat_type,
y = ~resale_price_persqm,
type = "box",
marker = list(symbol = "square-dot"),
boxmean = "sd") |>
layout(title = "Boxplot distribution resale price against other factors, 2022 transactions",
yaxis = list(title = "Resale Price per sqm (SGD)"),
updatemenus = list(list(type = 'dropdown',
xref = "paper",
yref = "paper",
xanchor = "left",
x = 0.04,
y = 1,
buttons = list(
list(method = "update",
args = list(list(x = list(flatpriceclean$flat_type)),
list(xaxis = list(title = "Flat Type")),
list(color = list(flatpriceclean$flat_type))),
label = "Flat Type"),
list(method = "update",
args = list(list(x = list(flatpriceclean$flat_model)),
list(xaxis = list(title = "Flat Model")),
list(color = list(flatpriceclean$flat_model))),
label = "Flat Model"),
list(method = "update",
args = list(list(x = list(flatpriceclean$storey_range)),
list(xaxis = list(title = "No of Storeys")),
list(color = list(flatpriceclean$storey_range))),
label = "No of Storeys"),
list(method = "update",
args = list(list(x = list(flatpriceclean$town)),
list(xaxis = list(title = "Town")),
list(color = list(flatpriceclean$town))),
label = "Town"),
list(method = "update",
args = list(list(x = list(flatpriceclean$month)),
list(xaxis = list(title = "Transaction Month")),
list(color = list(flatpriceclean$month))),
label = "Transaction Month")
)
)
)
)3.2 Statistical Visualization
The second plot is to investigate other factors that might impact the resale price.
Firstly, check the storey_range variables
#check number of observations per storey_range categories
table(flatpriceclean$storey_range)
01 TO 03 04 TO 06 07 TO 09 10 TO 12 13 TO 15 16 TO 18 19 TO 21 22 TO 24
4118 5453 5110 4499 2410 1176 506 340
25 TO 27 28 TO 30 31 TO 33 34 TO 36 37 TO 39 40 TO 42 43 TO 45 46 TO 48
264 168 103 102 77 29 11 5
49 TO 51
3
Show the code
ggbetweenstats(
data = flatpriceclean |>
mutate(storey_range = ifelse(storey_range %in% c("40 TO 42", "43 TO 45", "46 TO 48", "49 TO 51"), "40+", storey_range)),
x = storey_range,
y = resale_price_persqm,
xlab = "Number of Storeys",
ylab = "Resale Price per sqm (SGD)",
palette = "Paired",
title = "One-way ANOVA analysis reveals at least one significant difference in 2022 resale price across different storeys",
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "ns",
mean.ci = TRUE,
p.adjust.method = "fdr",
messages = FALSE
) 
Secondly, check the flat_model variables
Show the code
ggbetweenstats(
data = flatpriceclean,
x = flat_model,
y = resale_price_persqm,
xlab = "Flat Model",
ylab = "Resale Price per sqm (SGD)",
palette = "Set1",
type = "np",
pairwise.comparisons = FALSE,
mean.ci = TRUE,
p.adjust.method = "fdr",
messages = FALSE
) 
Filtering for number of observations >= 50
Show the code
flatpriceclean$flat_model <- fct_reorder(flatpriceclean$flat_model, flatpriceclean$resale_price_persqm)
ggbetweenstats(
data = flatpriceclean |>
group_by(flat_model) |>
filter(n() >= 50),
x = flat_model,
y = resale_price_persqm,
xlab = "Flat Model",
ylab = "Resale Price per sqm (SGD)",
palette = "Set1",
title = "One-way ANOVA analysis reveals at least one significant difference in 2022 resale price across different models",
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "ns",
mean.ci = TRUE,
p.adjust.method = "fdr",
messages = FALSE
) 
The town variables are skipped as there are too many variables -> to be considered in the final visualization
Lastly, check the transaction month variables
Show the code
ggbetweenstats(
data = flatpriceclean,
x = month,
y = resale_price_persqm,
xlab = "Month of Transaction",
ylab = "Resale Price per sqm (SGD)",
package = "dutchmasters",
palette = "milkmaid",
title = "One-way ANOVA analysis reveals at least one significant difference in 2022 resale price across different \ntransaction months",
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "ns",
mean.ci = TRUE,
p.adjust.method = "fdr",
messages = FALSE
) 
3.3 Final Visualization
Show the code
town_list <- list()
for (i in 1:length(unique(flatpriceclean$town))) {
town_list[[i]] <- list(method = "restyle",
args = list("transforms[0].value",
unique(flatpriceclean$town)[i]),
label = unique(flatpriceclean$town)[i])
}
town_list[[length(town_list) + 1]] <- list(method = "restyle",
args = list("transforms[0].value",
unique(flatpriceclean$town)),
label = "ALL")Show the code
flatpriceorder <- flatpriceclean[order(flatpriceclean$flat_type), ]
plot_ly(data = flatpriceclean,
x = ~remaining_lease_years,
y = ~resale_price_persqm,
hovertemplate = ~paste("<br>Resale Price per sqm:", resale_price_persqm,
"<br>Floor Area (sqm):", floor_area_sqm,
"<br>Remaining Lease (Year):", remaining_lease_years,
"<br>Town:", town),
type = 'scatter',
mode = 'markers',
size = ~floor_area_sqm,
sizes = c(5, 15),
color = ~factor(flat_type),
marker = list(opacity = 0.6,
sizemode = 'diameter',
line = list(width = 0.2, color = '#FFFFFF')),
transforms = list(list(type = 'filter',
target = ~flatpriceorder$town,
operation = '=',
value = unique(flatpriceorder$town)[1])
)
) |>
layout(title = "Resale Price per flat area increases with remaining lease, 2022 transactions",
xaxis = list(title = "Remaining Lease (Year)",
range = c(40, 100)),
yaxis = list(title = "Resale Price per sqm (SGD)",
range = c(3000, 16000)),
legend = list(orientation = 'h',
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor ="top",
x = 0.5,
y = 1),
updatemenus = list(list(type = 'dropdown',
x = 1.6, y = 0.7,
buttons = town_list)
)
)